


<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Frequently Asked Questions &mdash; PyTorch master documentation</title>
  

  
  
  
  
    <link rel="canonical" href="https://pytorch.org/docs/stable/notes/faq.html"/>
  

  

  
  
    

  

  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/jit.css" type="text/css" />
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.11.1/dist/katex.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/katex-math.css" type="text/css" />
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="Features for large-scale deployments" href="large_scale_deployments.html" />
    <link rel="prev" title="Extending PyTorch" href="extending.html" /> 

  
  <script src="../_static/js/modernizr.min.js"></script>

  <!-- Preload the theme fonts -->

<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-book.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-medium-italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2" as="font" type="font/woff2" crossorigin="anonymous">

<!-- Preload the katex fonts -->

<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Math-Italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size1-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size4-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size2-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size3-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Caligraphic-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
</head>

<div class="container-fluid header-holder tutorials-header" id="header-holder">
  <div class="container">
    <div class="header-container">
      <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>

      <div class="main-menu">
        <ul>
          <li>
            <a href="https://pytorch.org/get-started">Get Started</a>
          </li>

          <li>
            <div class="ecosystem-dropdown">
              <a id="dropdownMenuButton" data-toggle="ecosystem-dropdown">
                Ecosystem
              </a>
              <div class="ecosystem-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/hub"">
                  <span class=dropdown-title>Models (Beta)</span>
                  <p>Discover, publish, and reuse pre-trained models</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/ecosystem">
                  <span class=dropdown-title>Tools & Libraries</span>
                  <p>Explore the ecosystem of tools and libraries</p>
                </a>
              </div>
            </div>
          </li>

          <li>
            <a href="https://pytorch.org/mobile">Mobile</a>
          </li>

          <li>
            <a href="https://pytorch.org/blog/">Blog</a>
          </li>

          <li>
            <a href="https://pytorch.org/tutorials">Tutorials</a>
          </li>

          <li class="active">
            <a href="https://pytorch.org/docs/stable/index.html">Docs</a>
          </li>

          <li>
            <div class="resources-dropdown">
              <a id="resourcesDropdownButton" data-toggle="resources-dropdown">
                Resources
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/resources"">
                  <span class=dropdown-title>Developer Resources</span>
                  <p>Find resources and get questions answered</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/features">
                  <span class=dropdown-title>About</span>
                  <p>Learn about PyTorch’s features and capabilities</p>
                </a>
              </div>
            </div>
          </li>

          <li>
            <a href="https://github.com/pytorch/pytorch">Github</a>
          </li>
        </ul>
      </div>

      <a class="main-menu-open-button" href="#" data-behavior="open-mobile-menu"></a>
    </div>

  </div>
</div>


<body class="pytorch-body">

   

    

    <div class="table-of-contents-link-wrapper">
      <span>Table of Contents</span>
      <a href="#" class="toggle-table-of-contents" data-behavior="toggle-table-of-contents"></a>
    </div>

    <nav data-toggle="wy-nav-shift" class="pytorch-left-menu" id="pytorch-left-menu">
      <div class="pytorch-side-scroll">
        <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          <div class="pytorch-left-menu-search">
            

            
              
              
                <div class="version">
                  master (1.5.0 )
                </div>
              
            

            


  


<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search Docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

            
          </div>

          
<div>
  <a style="color:#F05732" href="https://pytorch.org/docs/stable/notes/faq.html">
    You are viewing unstable developer preview docs.
    Click here to view docs for latest stable release.
  </a>
</div>

            
            
              
            
            
              <p class="caption"><span class="caption-text">Notes</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="amp_examples.html">Automatic Mixed Precision examples</a></li>
<li class="toctree-l1"><a class="reference internal" href="autograd.html">Autograd mechanics</a></li>
<li class="toctree-l1"><a class="reference internal" href="broadcasting.html">Broadcasting semantics</a></li>
<li class="toctree-l1"><a class="reference internal" href="cpu_threading_torchscript_inference.html">CPU threading and TorchScript inference</a></li>
<li class="toctree-l1"><a class="reference internal" href="cuda.html">CUDA semantics</a></li>
<li class="toctree-l1"><a class="reference internal" href="ddp.html">Distributed Data Parallel</a></li>
<li class="toctree-l1"><a class="reference internal" href="extending.html">Extending PyTorch</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="large_scale_deployments.html">Features for large-scale deployments</a></li>
<li class="toctree-l1"><a class="reference internal" href="multiprocessing.html">Multiprocessing best practices</a></li>
<li class="toctree-l1"><a class="reference internal" href="randomness.html">Reproducibility</a></li>
<li class="toctree-l1"><a class="reference internal" href="serialization.html">Serialization semantics</a></li>
<li class="toctree-l1"><a class="reference internal" href="windows.html">Windows FAQ</a></li>
</ul>
<p class="caption"><span class="caption-text">Language Bindings</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/cppdocs/">C++ API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../packages.html">Javadoc</a></li>
</ul>
<p class="caption"><span class="caption-text">Python API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../torch.html">torch</a></li>
<li class="toctree-l1"><a class="reference internal" href="../nn.html">torch.nn</a></li>
<li class="toctree-l1"><a class="reference internal" href="../nn.functional.html">torch.nn.functional</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tensors.html">torch.Tensor</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tensor_attributes.html">Tensor Attributes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tensor_view.html">Tensor Views</a></li>
<li class="toctree-l1"><a class="reference internal" href="../autograd.html">torch.autograd</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cuda.html">torch.cuda</a></li>
<li class="toctree-l1"><a class="reference internal" href="../amp.html">torch.cuda.amp</a></li>
<li class="toctree-l1"><a class="reference internal" href="../distributed.html">torch.distributed</a></li>
<li class="toctree-l1"><a class="reference internal" href="../distributions.html">torch.distributions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../hub.html">torch.hub</a></li>
<li class="toctree-l1"><a class="reference internal" href="../jit.html">torch.jit</a></li>
<li class="toctree-l1"><a class="reference internal" href="../nn.init.html">torch.nn.init</a></li>
<li class="toctree-l1"><a class="reference internal" href="../onnx.html">torch.onnx</a></li>
<li class="toctree-l1"><a class="reference internal" href="../optim.html">torch.optim</a></li>
<li class="toctree-l1"><a class="reference internal" href="../quantization.html">Quantization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../rpc/index.html">Distributed RPC Framework</a></li>
<li class="toctree-l1"><a class="reference internal" href="../random.html">torch.random</a></li>
<li class="toctree-l1"><a class="reference internal" href="../sparse.html">torch.sparse</a></li>
<li class="toctree-l1"><a class="reference internal" href="../storage.html">torch.Storage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../bottleneck.html">torch.utils.bottleneck</a></li>
<li class="toctree-l1"><a class="reference internal" href="../checkpoint.html">torch.utils.checkpoint</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cpp_extension.html">torch.utils.cpp_extension</a></li>
<li class="toctree-l1"><a class="reference internal" href="../data.html">torch.utils.data</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dlpack.html">torch.utils.dlpack</a></li>
<li class="toctree-l1"><a class="reference internal" href="../model_zoo.html">torch.utils.model_zoo</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tensorboard.html">torch.utils.tensorboard</a></li>
<li class="toctree-l1"><a class="reference internal" href="../type_info.html">Type Info</a></li>
<li class="toctree-l1"><a class="reference internal" href="../named_tensor.html">Named Tensors</a></li>
<li class="toctree-l1"><a class="reference internal" href="../name_inference.html">Named Tensors operator coverage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../__config__.html">torch.__config__</a></li>
</ul>
<p class="caption"><span class="caption-text">Libraries</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/audio">torchaudio</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/text">torchtext</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/elastic/">TorchElastic</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/serve">TorchServe</a></li>
<li class="toctree-l1"><a class="reference external" href="http://pytorch.org/xla/">PyTorch on XLA Devices</a></li>
</ul>
<p class="caption"><span class="caption-text">Community</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../community/contribution_guide.html">PyTorch Contribution Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../community/governance.html">PyTorch Governance</a></li>
<li class="toctree-l1"><a class="reference internal" href="../community/persons_of_interest.html">PyTorch Governance | Persons of Interest</a></li>
</ul>

            
          

        </div>
      </div>
    </nav>

    <div class="pytorch-container">
      <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
        <div class="pytorch-breadcrumbs-wrapper">
          















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="pytorch-breadcrumbs">
    
      <li>
        <a href="../index.html">
          
            Docs
          
        </a> &gt;
      </li>

        
      <li>Frequently Asked Questions</li>
    
    
      <li class="pytorch-breadcrumbs-aside">
        
            
            <a href="../_sources/notes/faq.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
          
        
      </li>
    
  </ul>

  
</div>
        </div>

        <div class="pytorch-shortcuts-wrapper" id="pytorch-shortcuts-wrapper">
          Shortcuts
        </div>
      </div>

      <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
        <div class="pytorch-content-left">

        
          
          <div class="rst-content">
          
            <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
             <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
              
  <div class="section" id="frequently-asked-questions">
<h1>Frequently Asked Questions<a class="headerlink" href="#frequently-asked-questions" title="Permalink to this headline">¶</a></h1>
<div class="section" id="my-model-reports-cuda-runtime-error-2-out-of-memory">
<h2>My model reports “cuda runtime error(2): out of memory”<a class="headerlink" href="#my-model-reports-cuda-runtime-error-2-out-of-memory" title="Permalink to this headline">¶</a></h2>
<p>As the error message suggests, you have run out of memory on your
GPU.  Since we often deal with large amounts of data in PyTorch,
small mistakes can rapidly cause your program to use up all of your
GPU; fortunately, the fixes in these cases are often simple.
Here are a few common things to check:</p>
<p><strong>Don’t accumulate history across your training loop.</strong>
By default, computations involving variables that require gradients
will keep history.  This means that you should avoid using such
variables in computations which will live beyond your training loops,
e.g., when tracking statistics. Instead, you should detach the variable
or access its underlying data.</p>
<p>Sometimes, it can be non-obvious when differentiable variables can
occur.  Consider the following training loop (abridged from <a class="reference external" href="https://discuss.pytorch.org/t/high-memory-usage-while-training/162">source</a>):</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">total_loss</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10000</span><span class="p">):</span>
    <span class="n">optimizer</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
    <span class="n">output</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
    <span class="n">loss</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>
    <span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
    <span class="n">optimizer</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
    <span class="n">total_loss</span> <span class="o">+=</span> <span class="n">loss</span>
</pre></div>
</div>
<p>Here, <code class="docutils literal notranslate"><span class="pre">total_loss</span></code> is accumulating history across your training loop, since
<code class="docutils literal notranslate"><span class="pre">loss</span></code> is a differentiable variable with autograd history. You can fix this by
writing <cite>total_loss += float(loss)</cite> instead.</p>
<p>Other instances of this problem:
<a class="reference external" href="https://discuss.pytorch.org/t/resolved-gpu-out-of-memory-error-with-batch-size-1/3719">1</a>.</p>
<p><strong>Don’t hold onto tensors and variables you don’t need.</strong>
If you assign a Tensor or Variable to a local, Python will not
deallocate until the local goes out of scope.  You can free
this reference by using <code class="docutils literal notranslate"><span class="pre">del</span> <span class="pre">x</span></code>.  Similarly, if you assign
a Tensor or Variable to a member variable of an object, it will
not deallocate until the object goes out of scope.  You will
get the best memory usage if you don’t hold onto temporaries
you don’t need.</p>
<p>The scopes of locals can be larger than you expect.  For example:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">):</span>
    <span class="n">intermediate</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="nb">input</span><span class="p">[</span><span class="n">i</span><span class="p">])</span>
    <span class="n">result</span> <span class="o">+=</span> <span class="n">g</span><span class="p">(</span><span class="n">intermediate</span><span class="p">)</span>
<span class="n">output</span> <span class="o">=</span> <span class="n">h</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="k">return</span> <span class="n">output</span>
</pre></div>
</div>
<p>Here, <code class="docutils literal notranslate"><span class="pre">intermediate</span></code> remains live even while <code class="docutils literal notranslate"><span class="pre">h</span></code> is executing,
because its scope extrudes past the end of the loop.  To free it
earlier, you should <code class="docutils literal notranslate"><span class="pre">del</span> <span class="pre">intermediate</span></code> when you are done with it.</p>
<p><strong>Don’t run RNNs on sequences that are too large.</strong>
The amount of memory required to backpropagate through an RNN scales
linearly with the length of the RNN input; thus, you will run out of memory
if you try to feed an RNN a sequence that is too long.</p>
<p>The technical term for this phenomenon is <a class="reference external" href="https://en.wikipedia.org/wiki/Backpropagation_through_time">backpropagation through time</a>,
and there are plenty of references for how to implement truncated
BPTT, including in the <a class="reference external" href="https://github.com/pytorch/examples/tree/master/word_language_model">word language model</a> example; truncation is handled by the
<code class="docutils literal notranslate"><span class="pre">repackage</span></code> function as described in
<a class="reference external" href="https://discuss.pytorch.org/t/help-clarifying-repackage-hidden-in-word-language-model/226">this forum post</a>.</p>
<p><strong>Don’t use linear layers that are too large.</strong>
A linear layer <code class="docutils literal notranslate"><span class="pre">nn.Linear(m,</span> <span class="pre">n)</span></code> uses <span class="math"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>O</mi><mo stretchy="false">(</mo><mi>n</mi><mi>m</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">O(nm)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.02778em;">O</span><span class="mopen">(</span><span class="mord mathdefault">n</span><span class="mord mathdefault">m</span><span class="mclose">)</span></span></span></span>

</span> memory: that is to say,
the memory requirements of the weights
scales quadratically with the number of features.  It is very easy
to <a class="reference external" href="https://github.com/pytorch/pytorch/issues/958">blow through your memory</a>
this way (and remember that you will need at least twice the size of the
weights, since you also need to store the gradients.)</p>
</div>
<div class="section" id="my-gpu-memory-isn-t-freed-properly">
<h2>My GPU memory isn’t freed properly<a class="headerlink" href="#my-gpu-memory-isn-t-freed-properly" title="Permalink to this headline">¶</a></h2>
<p>PyTorch uses a caching memory allocator to speed up memory allocations. As a
result, the values shown in <code class="docutils literal notranslate"><span class="pre">nvidia-smi</span></code> usually don’t reflect the true
memory usage. See <a class="reference internal" href="cuda.html#cuda-memory-management"><span class="std std-ref">Memory management</span></a> for more details about GPU
memory management.</p>
<p>If your GPU memory isn’t freed even after Python quits, it is very likely that
some Python subprocesses are still alive. You may find them via
<code class="docutils literal notranslate"><span class="pre">ps</span> <span class="pre">-elf</span> <span class="pre">|</span> <span class="pre">grep</span> <span class="pre">python</span></code> and manually kill them with <code class="docutils literal notranslate"><span class="pre">kill</span> <span class="pre">-9</span> <span class="pre">[pid]</span></code>.</p>
</div>
<div class="section" id="my-data-loader-workers-return-identical-random-numbers">
<span id="dataloader-workers-random-seed"></span><h2>My data loader workers return identical random numbers<a class="headerlink" href="#my-data-loader-workers-return-identical-random-numbers" title="Permalink to this headline">¶</a></h2>
<p>You are likely using other libraries to generate random numbers in the dataset.
For example, NumPy’s RNG is duplicated when worker subprocesses are started via
<code class="docutils literal notranslate"><span class="pre">fork</span></code>. See <a class="reference internal" href="../data.html#torch.utils.data.DataLoader" title="torch.utils.data.DataLoader"><code class="xref py py-class docutils literal notranslate"><span class="pre">torch.utils.data.DataLoader</span></code></a>’s documentation for how to
properly set up random seeds in workers with its <code class="xref py py-attr docutils literal notranslate"><span class="pre">worker_init_fn</span></code> option.</p>
</div>
<div class="section" id="my-recurrent-network-doesn-t-work-with-data-parallelism">
<span id="pack-rnn-unpack-with-data-parallelism"></span><h2>My recurrent network doesn’t work with data parallelism<a class="headerlink" href="#my-recurrent-network-doesn-t-work-with-data-parallelism" title="Permalink to this headline">¶</a></h2>
<p>There is a subtlety in using the
<code class="docutils literal notranslate"><span class="pre">pack</span> <span class="pre">sequence</span> <span class="pre">-&gt;</span> <span class="pre">recurrent</span> <span class="pre">network</span> <span class="pre">-&gt;</span> <span class="pre">unpack</span> <span class="pre">sequence</span></code> pattern in a
<a class="reference internal" href="../nn.html#torch.nn.Module" title="torch.nn.Module"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a> with <a class="reference internal" href="../nn.html#torch.nn.DataParallel" title="torch.nn.DataParallel"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataParallel</span></code></a> or
<a class="reference internal" href="../nn.functional.html#torch.nn.parallel.data_parallel" title="torch.nn.parallel.data_parallel"><code class="xref py py-func docutils literal notranslate"><span class="pre">data_parallel()</span></code></a>. Input to each the <code class="xref py py-meth docutils literal notranslate"><span class="pre">forward()</span></code> on
each device will only be part of the entire input. Because the unpack operation
<a class="reference internal" href="../nn.html#torch.nn.utils.rnn.pad_packed_sequence" title="torch.nn.utils.rnn.pad_packed_sequence"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.nn.utils.rnn.pad_packed_sequence()</span></code></a> by default only pads up to the
longest input it sees, i.e., the longest on that particular device, size
mismatches will happen when results are gathered together. Therefore, you can
instead take advantage of the <code class="xref py py-attr docutils literal notranslate"><span class="pre">total_length</span></code> argument of
<a class="reference internal" href="../nn.html#torch.nn.utils.rnn.pad_packed_sequence" title="torch.nn.utils.rnn.pad_packed_sequence"><code class="xref py py-func docutils literal notranslate"><span class="pre">pad_packed_sequence()</span></code></a> to make sure that the
<code class="xref py py-meth docutils literal notranslate"><span class="pre">forward()</span></code> calls return sequences of same length. For example, you can
write:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">torch.nn.utils.rnn</span> <span class="kn">import</span> <span class="n">pack_padded_sequence</span><span class="p">,</span> <span class="n">pad_packed_sequence</span>

<span class="k">class</span> <span class="nc">MyModule</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
    <span class="c1"># ... __init__, other methods, etc.</span>

    <span class="c1"># padded_input is of shape [B x T x *] (batch_first mode) and contains</span>
    <span class="c1"># the sequences sorted by lengths</span>
    <span class="c1">#   B is the batch size</span>
    <span class="c1">#   T is max sequence length</span>
    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">padded_input</span><span class="p">,</span> <span class="n">input_lengths</span><span class="p">):</span>
        <span class="n">total_length</span> <span class="o">=</span> <span class="n">padded_input</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>  <span class="c1"># get the max sequence length</span>
        <span class="n">packed_input</span> <span class="o">=</span> <span class="n">pack_padded_sequence</span><span class="p">(</span><span class="n">padded_input</span><span class="p">,</span> <span class="n">input_lengths</span><span class="p">,</span>
                                            <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
        <span class="n">packed_output</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">my_lstm</span><span class="p">(</span><span class="n">packed_input</span><span class="p">)</span>
        <span class="n">output</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">pad_packed_sequence</span><span class="p">(</span><span class="n">packed_output</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
                                        <span class="n">total_length</span><span class="o">=</span><span class="n">total_length</span><span class="p">)</span>
        <span class="k">return</span> <span class="n">output</span>


<span class="n">m</span> <span class="o">=</span> <span class="n">MyModule</span><span class="p">()</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
<span class="n">dp_m</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">DataParallel</span><span class="p">(</span><span class="n">m</span><span class="p">)</span>
</pre></div>
</div>
<p>Additionally, extra care needs to be taken when batch dimension is dim <code class="docutils literal notranslate"><span class="pre">1</span></code>
(i.e., <code class="docutils literal notranslate"><span class="pre">batch_first=False</span></code>) with data parallelism. In this case, the first
argument of pack_padded_sequence <code class="docutils literal notranslate"><span class="pre">padding_input</span></code> will be of shape
<code class="docutils literal notranslate"><span class="pre">[T</span> <span class="pre">x</span> <span class="pre">B</span> <span class="pre">x</span> <span class="pre">*]</span></code> and should be scattered along dim <code class="docutils literal notranslate"><span class="pre">1</span></code>, but the second argument
<code class="docutils literal notranslate"><span class="pre">input_lengths</span></code> will be of shape <code class="docutils literal notranslate"><span class="pre">[B]</span></code> and should be scattered along dim
<code class="docutils literal notranslate"><span class="pre">0</span></code>. Extra code to manipulate the tensor shapes will be needed.</p>
</div>
</div>


             </article>
             
            </div>
            <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="large_scale_deployments.html" class="btn btn-neutral float-right" title="Features for large-scale deployments" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
      
      
        <a href="extending.html" class="btn btn-neutral" title="Extending PyTorch" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
      
    </div>
  

  

    <hr>

  

  <div role="contentinfo">
    <p>
        &copy; Copyright 2019, Torch Contributors.

    </p>
  </div>
    
      <div>
        Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
      </div>
     

</footer>

          </div>
        </div>

        <div class="pytorch-content-right" id="pytorch-content-right">
          <div class="pytorch-right-menu" id="pytorch-right-menu">
            <div class="pytorch-side-scroll" id="pytorch-side-scroll-right">
              <ul>
<li><a class="reference internal" href="#">Frequently Asked Questions</a><ul>
<li><a class="reference internal" href="#my-model-reports-cuda-runtime-error-2-out-of-memory">My model reports “cuda runtime error(2): out of memory”</a></li>
<li><a class="reference internal" href="#my-gpu-memory-isn-t-freed-properly">My GPU memory isn’t freed properly</a></li>
<li><a class="reference internal" href="#my-data-loader-workers-return-identical-random-numbers">My data loader workers return identical random numbers</a></li>
<li><a class="reference internal" href="#my-recurrent-network-doesn-t-work-with-data-parallelism">My recurrent network doesn’t work with data parallelism</a></li>
</ul>
</li>
</ul>

            </div>
          </div>
        </div>
      </section>
    </div>

  


  

     
       <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
         <script src="../_static/jquery.js"></script>
         <script src="../_static/underscore.js"></script>
         <script src="../_static/doctools.js"></script>
         <script src="../_static/language_data.js"></script>
     

  

  <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
  <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/1.5.0/list.min.js"></script>
  <script type="text/javascript" src="../_static/js/theme.js"></script>

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>
 
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-90545585-1', 'auto');
  ga('send', 'pageview');

</script>

<script async src="https://www.googletagmanager.com/gtag/js?id=UA-117752657-2"></script>

<script>
  window.dataLayer = window.dataLayer || [];

  function gtag(){dataLayer.push(arguments);}

  gtag('js', new Date());
  gtag('config', 'UA-117752657-2');
</script>

<img height="1" width="1" style="border-style:none;" alt="" src="https://www.googleadservices.com/pagead/conversion/795629140/?label=txkmCPmdtosBENSssfsC&amp;guid=ON&amp;script=0"/>


  <!-- Begin Footer -->

  <div class="container-fluid docs-tutorials-resources" id="docs-tutorials-resources">
    <div class="container">
      <div class="row">
        <div class="col-md-4 text-center">
          <h2>Docs</h2>
          <p>Access comprehensive developer documentation for PyTorch</p>
          <a class="with-right-arrow" href="https://pytorch.org/docs/stable/index.html">View Docs</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Tutorials</h2>
          <p>Get in-depth tutorials for beginners and advanced developers</p>
          <a class="with-right-arrow" href="https://pytorch.org/tutorials">View Tutorials</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Resources</h2>
          <p>Find development resources and get your questions answered</p>
          <a class="with-right-arrow" href="https://pytorch.org/resources">View Resources</a>
        </div>
      </div>
    </div>
  </div>

  <footer class="site-footer">
    <div class="container footer-container">
      <div class="footer-logo-wrapper">
        <a href="https://pytorch.org/" class="footer-logo"></a>
      </div>

      <div class="footer-links-wrapper">
        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/">PyTorch</a></li>
            <li><a href="https://pytorch.org/get-started">Get Started</a></li>
            <li><a href="https://pytorch.org/features">Features</a></li>
            <li><a href="https://pytorch.org/ecosystem">Ecosystem</a></li>
            <li><a href="https://pytorch.org/blog/">Blog</a></li>
            <li><a href="https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md">Contributing</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/resources">Resources</a></li>
            <li><a href="https://pytorch.org/tutorials">Tutorials</a></li>
            <li><a href="https://pytorch.org/docs/stable/index.html">Docs</a></li>
            <li><a href="https://discuss.pytorch.org" target="_blank">Discuss</a></li>
            <li><a href="https://github.com/pytorch/pytorch/issues" target="_blank">Github Issues</a></li>
            <li><a href="https://pytorch.org/assets/brand-guidelines/PyTorch-Brand-Guidelines.pdf" target="_blank">Brand Guidelines</a></li>
          </ul>
        </div>

        <div class="footer-links-col follow-us-col">
          <ul>
            <li class="list-title">Stay Connected</li>
            <li>
              <div id="mc_embed_signup">
                <form
                  action="https://twitter.us14.list-manage.com/subscribe/post?u=75419c71fe0a935e53dfa4a3f&id=91d0dccd39"
                  method="post"
                  id="mc-embedded-subscribe-form"
                  name="mc-embedded-subscribe-form"
                  class="email-subscribe-form validate"
                  target="_blank"
                  novalidate>
                  <div id="mc_embed_signup_scroll" class="email-subscribe-form-fields-wrapper">
                    <div class="mc-field-group">
                      <label for="mce-EMAIL" style="display:none;">Email Address</label>
                      <input type="email" value="" name="EMAIL" class="required email" id="mce-EMAIL" placeholder="Email Address">
                    </div>

                    <div id="mce-responses" class="clear">
                      <div class="response" id="mce-error-response" style="display:none"></div>
                      <div class="response" id="mce-success-response" style="display:none"></div>
                    </div>    <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->

                    <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_75419c71fe0a935e53dfa4a3f_91d0dccd39" tabindex="-1" value=""></div>

                    <div class="clear">
                      <input type="submit" value="" name="subscribe" id="mc-embedded-subscribe" class="button email-subscribe-button">
                    </div>
                  </div>
                </form>
              </div>

            </li>
          </ul>

          <div class="footer-social-icons">
            <a href="https://www.facebook.com/pytorch" target="_blank" class="facebook"></a>
            <a href="https://twitter.com/pytorch" target="_blank" class="twitter"></a>
            <a href="https://www.youtube.com/pytorch" target="_blank" class="youtube"></a>
          </div>
        </div>
      </div>
    </div>
  </footer>

  <div class="cookie-banner-wrapper">
  <div class="container">
    <p class="gdpr-notice">To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: <a href="https://www.facebook.com/policies/cookies/">Cookies Policy</a>.</p>
    <img class="close-button" src="../_static/images/pytorch-x.svg">
  </div>
</div>

  <!-- End Footer -->

  <!-- Begin Mobile Menu -->

  <div class="mobile-main-menu">
    <div class="container-fluid">
      <div class="container">
        <div class="mobile-main-menu-header-container">
          <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>
          <a class="main-menu-close-button" href="#" data-behavior="close-mobile-menu"></a>
        </div>
      </div>
    </div>

    <div class="mobile-main-menu-links-container">
      <div class="main-menu">
        <ul>
          <li>
            <a href="https://pytorch.org/get-started">Get Started</a>
          </li>

          <li>
            <a href="https://pytorch.org/features">Features</a>
          </li>

          <li>
            <a href="https://pytorch.org/ecosystem">Ecosystem</a>
          </li>

          <li>
            <a href="https://pytorch.org/mobile">Mobile</a>
          </li>

          <li>
            <a href="https://pytorch.org/hub">PyTorch Hub</a>
          </li>

          <li>
            <a href="https://pytorch.org/blog/">Blog</a>
          </li>

          <li>
            <a href="https://pytorch.org/tutorials">Tutorials</a>
          </li>

          <li class="active">
            <a href="https://pytorch.org/docs/stable/index.html">Docs</a>
          </li>

          <li>
            <a href="https://pytorch.org/resources">Resources</a>
          </li>

          <li>
            <a href="https://github.com/pytorch/pytorch">Github</a>
          </li>
        </ul>
      </div>
    </div>
  </div>

  <!-- End Mobile Menu -->

  <script type="text/javascript" src="../_static/js/vendor/anchor.min.js"></script>

  <script type="text/javascript">
    $(document).ready(function() {
      mobileMenu.bind();
      mobileTOC.bind();
      pytorchAnchors.bind();
      sideMenus.bind();
      scrollToAnchor.bind();
      highlightNavigation.bind();
      mainMenuDropdown.bind();
      filterTags.bind();

      // Remove any empty p tags that Sphinx adds
      $("[data-tags='null']").remove();

      // Add class to links that have code blocks, since we cannot create links in code blocks
      $("article.pytorch-article a span.pre").each(function(e) {
        $(this).closest("a").addClass("has-code");
      });
    })
  </script>
</body>
</html>